% Simulator for a Markov Chain 
clear
clc
rand('twister',25);
% Embedded in it is the Q-Learning algorithm

global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM 

global5_2% global parameters initialized 

stat=struct('J',zeros(NS),'beta',zeros(NS,NA),'iter',0,'old_action',1,'old_state',1,'current_state',1,'rimm',0,'total_reward',0,'explore',0.5,'flag',0,...
'J_plot1',zeros(NS,1),'J_plot2',zeros(NS,1),'iter_plot',zeros(NS,1));


        done=0; % Pnemonic for simulation, 1 stands for end
                % 0 stands for continue 

        	while 0==done
                
                [stat,done]=jump_learn(stat);
                
                end

                policy=pol_finder(stat);

                
                
%plot(stat.J_plot1,'k.-');

actor_values=stat.beta

figure 
plot(stat.iter_plot,stat.J_plot1,'-',stat.iter_plot,stat.J_plot2,'-',stat.iter_plot,stat.J_plot3,'-',... 
    stat.iter_plot,stat.J_plot4,'-',stat.iter_plot,stat.J_plot5,'-','Linewidth', 2);
 xlabel('NUMBER OF ITERATIONS TIMES 100');
 ylabel('VALUE FUNCTION')
 
%figure 
%plot(stat.J_plot2,'-','Linewidth', 2);
 %xlabel('NUMBER OF ITERATIONS TIMES 100');
 %ylabel('J(2)')


